Loading in data

All following graphs and statistics are based on the results of the Limesurvey online pre-test. 40 participants filled out the online pre-test, of which 20 saw the same sentences and each participant saw them in a different pseudo-randomized order.

stimuli         <- read.csv(file="allstimuli_new.csv",head=TRUE,sep=";",na.strings = c("","NAN"), stringsAsFactors = FALSE,encoding="UTF-8")
stimuli$fullID  <- as.character(interaction(stimuli[,c(13,3)],sep = ""))
stimuli$Condition <- as.factor(stimuli$Condition)
colnames(stimuli) <- c("Type","Pair","ID","Det.","N0","Verb","Det.2","N1","Prep.","Det.3","Adj.","N2","Attachment","Unambiguous","verb_number","fullID")
#load response file
file_names = list.files(pattern="^results",path = "allresponses",full.names = TRUE)
temp <- lapply(file_names,read.csv,sep=",",na.strings = c("","NAN"), stringsAsFactors = TRUE)
df.responses <- Reduce(function(x,y) merge(x,y,all=TRUE,sort=TRUE),temp)
#transpose and fix column names and classes
df.responses                <- setDT(df.responses,keep.rownames = TRUE)
colnames(df.responses)[1]   <- "subjects"
df.responses                <- Filter(function(x) !(all(x=="")), df.responses) #delete blank columns

Data was subsequently split into subject info (demographics etc.) and responses to items

#Split all item specific data from subject specific data
ind_items   <- grep("[VN]A[0-9]",colnames(df.responses),value = TRUE)
df.subjectinfo <- df.responses
df.subjectinfo[,ind_items] <- NULL
df.responses  <- df.responses[,c("subjects",ind_items),with=FALSE]
#reshape items so that each row contains info for items per subject (items repeat over rows)
ind             <- grep("^[VN]A[0-9]*$",colnames(df.responses),value = TRUE)
df.respAttach   <- df.responses[,c("subjects",ind),with=FALSE]
df.respAttach   <- melt(df.respAttach,id="subjects",value.name="response_attachment",variable.name='items')
ind             <- grep("^[VN]A[0-9]*Time$",colnames(df.responses),value = TRUE)
df.respTime     <- df.responses[,c("subjects",ind),with=FALSE]
df.respTime     <- melt(df.respTime,id="subjects",value.name="rt_attachment",variable.name='items')
ind             <- grep("\\.$",colnames(df.responses),value = TRUE)
df.respPAttach  <- df.responses[,c("subjects",ind),with=FALSE]
df.respPAttach  <- melt(df.respPAttach,id="subjects",value.name="rating_plausibility",variable.name='items')
ind             <- grep("P[VN]A[0-9]*Time$",colnames(df.responses),value = TRUE)
df.respPTime    <- df.responses[,c("subjects",ind),with=FALSE]
df.respPTime    <- melt(df.respPTime,id="subjects",value.name="rt_plausibility",variable.name='items')
df.responses    <- cbind(df.respAttach,df.respTime[,3],df.respPAttach[,3],df.respPTime[,3])
df.responses$hits <- as.numeric((grepl("N",df.responses$items) & df.responses$response_attachment == "Nomen") | 
                      (grepl("V",df.responses$items) & df.responses$response_attachment == "Verb"))
df.responses    <- df.responses %>%
                    mutate(attachment = ifelse(grepl("N",items),"Noun","Verb"))

Summary Statistics

Age of participants:

summary(df.subjectinfo$age)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   22.00   25.00   25.16   27.00   33.00

Time taken to complete pre-test (in minutes):

summary((df.subjectinfo$interviewtime)/60)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   23.36   31.54   33.73   36.57   40.16   80.20

1. Outlier

Outlier subjects were identified according to three scores:
1. Subjects who have more than one incorrect test item
2. Subjects who have less than 60% correct for the unambiguous items
3. Subjects who’s average reaction time diverges extremely from the average

1a. Unambiguous sentences:

#average accuracy of unambiguous items
subset_unambiguous <- stimuli$fullID[(stimuli$Unambiguous==1)]
sentences_unambiguous <- stimuli[(stimuli$Unambiguous==1),4:12]
sentences_unambiguous

1b. Reaction Times

least_correct = 3
threshhold_acc = 0.60
avg_rts <- df.responses %>%
                        group_by(subjects) %>%
                        summarise(mean_rt = mean(rt_attachment,na.rm=TRUE)) 

outlier <- boxplot.stats(avg_rts$mean_rt)$out
p <- ggplot(avg_rts, aes(x=factor(0),y=mean_rt,subject=subjects)) +
    geom_boxplot() +
    geom_jitter(size = 2) +
    ggtitle("Average reaction times per subject across all items")
p <- ggplotly(p,tooltip ='subjects')
p
test_correct   <- df.subjectinfo %>%
                   select(subjects,starts_with("correct")) %>%
                      mutate(number_correct = rowSums(!is.na(.[,2:ncol(.)]))) %>%
                        select(subjects,number_correct)

avg_unambiguous <-df.responses %>%
                    filter(items %in% subset_unambiguous) %>%
                      group_by(subjects) %>%
                        summarise(acc_unambiguous = mean(hits,na.rm=TRUE))

1c. Summary table per subject

summ <- join_all(list(avg_rts,test_correct,avg_unambiguous),by='subjects')
DT::datatable(summ) %>% formatRound(columns=colnames(summ),digits=2)
outlier_indx <- avg_rts$subjects[which(round(avg_rts$mean_rt,digits=2) %in% round(outlier,digits=2))]
outlier_indx <- c(outlier_indx,summ$subjects[which(summ$number_correct < least_correct | summ$acc_unambiguous < threshhold_acc)])
outlier_indx <- unique(outlier_indx)
#Remove Outlier
df.responses <- df.responses %>%
                filter(!(subjects %in% outlier_indx))

Number of removed outliers: 2

2. Assess items

#Binomial distribution of probabilities if chance-performane is 50%
x <- seq(0,20,by = 1)
y <- dbinom(x,20,0.5)
#plot(x,y,main="Probability distribution assuming per item probability of 50%",
#      xlab="Number of correct answers per items (out of 20 answers)", ylab = "Probability")

Reject stimuli with too few correct responses (less than 72% expected answers)

2a. Rejected items:

threshold = 0.72

acc_per_item <- df.responses %>%
                  group_by(items) %>%
                    summarise(acc = mean(hits,na.rm = TRUE), plaus = mean(rating_plausibility,na.rm = TRUE))
mean_accuracy <- acc_per_item %>%
                    summarise(mean_correct = mean(acc,na.rm = TRUE))

df.stim_acc <- merge(stimuli,acc_per_item,by.x="fullID",by.y="items")
df.stim_acc$acc <- round(df.stim_acc$acc,digits=2)
df.stim_acc$plaus <- round(df.stim_acc$plaus,digits=2)

items_reject <- df.stim_acc %>% arrange(Verb) %>% filter(acc < threshold)
                                                           
  #datatable(items_reject[,c(1,2,4,6:13,17)],
  #          filter = 'top',
  #          options = list("pageLength" = 10))
  datatable(items_reject[,c(1,2,4,6:13,17,18)])

2b. Remaining items:

#first reject items based on accuracy threshold with their paired items
pairs_reject <- df.stim_acc[df.stim_acc$ID %in% items_reject$ID,]
df.responses_clean <- df.responses %>%
                filter(!(items %in% pairs_reject$fullID))
#Then check if all verbs still occur twice (also for type 1 items) and reject remaining pair.
pairs_remain <- df.stim_acc[df.stim_acc$fullID %in% df.responses_clean$items,]
pairs_remain <-  unique(pairs_remain[duplicated(pairs_remain$verb_number),"verb_number"])
items_reject2 <- df.stim_acc[!(df.stim_acc$verb_number %in% pairs_remain),]
pairs_reject2 <- df.stim_acc[df.stim_acc$ID %in% items_reject2$ID,]
df.responses_clean <- df.responses_clean %>%
                filter(!(items %in% pairs_reject2$fullID))
datatable(df.stim_acc[df.stim_acc$fullID %in% df.responses_clean$item,c(1,2,4,6:13,17,18)])
#manually reject superfluous items
items_reject3 = c("VA249","NA249","NA21","VA21","NA243","VA243","NA2104","VA2104","NA247","VA247","NA288","VA288","NA255","VA255","NA237","VA237","NA22","VA22","NA25","VA25","NA24","VA24","NA272","VA272","NA229","VA229","NA286","VA286","NA287","VA287","NA275","VA275","NA278","VA278","NA253","VA253","NA120","VA120","NA133","VA133","NA125","VA125","NA183","VA183","NA188","VA188","NA1101","VA1101","NA147","VA147")
df.responses_clean <- df.responses_clean %>%
                filter(!(items %in% items_reject3))
datatable(df.stim_acc[df.stim_acc$fullID %in% df.responses_clean$item,c(1,2,4,6:13,17,18)])

There are 100 items of Type 1 left
There are 98 items of Type 2 left

3. Biases

3a. Accuracy difference within pairs

acc_per_item <- df.responses_clean %>%
                  group_by(items) %>%
                    summarise(acc = mean(hits,na.rm = TRUE))
mean_accuracy <- acc_per_item %>%
                    summarise(mean_correct = mean(acc,na.rm = TRUE))

df.stim_acc <- merge(stimuli,acc_per_item,by.x="fullID",by.y="items")
df.stim_acc$acc <- round(df.stim_acc$acc,digits=2)
diff_acc <- df.stim_acc %>% group_by(ID) %>% arrange(ID) %>% mutate(diff_acc= acc -lag(acc))

p <- ggplot(diff_acc, aes(Type,diff_acc,colour=Type,N0=N0,Verb=Verb,N1=N1,Prep=Prep.,Adj=Adj.,N2=N2)) +
    geom_boxplot() +
    geom_jitter(size = 2) +
    ggtitle("difference in accuracy within pairs: contrast Verb-Noun ")+
    labs(y="difference in accuracy (%)",colour="Type")+
    scale_x_discrete(labels = c("Type 1: differ in Verb","Type 2: differ in Noun order"))
p <- ggplotly(p,tooltip =c('N0','Verb','N1','Prep','Adj','N2'))
p

3b. Average reaction times per subject, split for verb/noun and hit(1)/miss(0)

summary_rts <- df.responses_clean %>%
                 group_by(subjects,attachment,hits) %>%
                   summarise(mean_rt = mean(rt_attachment)) 
                      
p <- ggplot(data = subset(summary_rts, !is.na(hits)), aes(x = factor(hits), y = mean_rt,subject = subjects,fill=factor(hits))) +
    geom_boxplot() +
    facet_wrap(~attachment) +
    geom_jitter(size = 2) +
    ggtitle("RTs averaged over items")+
    scale_x_discrete(labels = c("miss","hit","miss","hit"))
p <- ggplotly(p,tooltip = c("subject","mean_rt"))
p

### 3c. Accuracy for Verb and Noun attached items

3d. Percentage of Verb responses per subject

attachment_freq <- df.responses_clean %>%
                        na.omit %>%
                          group_by(subjects,response_attachment) %>%
                            summarise(count=n())%>%
                            mutate(freq = count/sum(count))
                           #summarise(percent = length(response_attachment)/162) 
percent_VAresponses <- attachment_freq[attachment_freq$response_attachment=="Verb",]
percent_NAresponses <- attachment_freq[attachment_freq$response_attachment=="Nomen",]
#ggplot(attachment_percent, aes(percent, fill = response_attachment)) + 
#  geom_density(alpha = 0.2)
ggplot(attachment_freq, aes(freq, fill = response_attachment)) + geom_histogram(alpha = 0.5, aes(y = ..density..), position = 'identity')

4. Analyse Plausibility ratings

plausibility_counts <- df.responses_clean %>%
                        group_by(attachment) %>%
                        count(rating_plausibility) %>% na.omit()

ggplot(plausibility_counts,aes(rating_plausibility,n,fill=attachment))+
  geom_bar(stat="identity",position=position_dodge())

4b. Reaction times & Plausibility:

# Group RTs by plausibility ratings and attachment
summary_rts <- df.responses_clean %>%
  group_by(items,attachment,rating_plausibility) %>%
  summarise(mean_rt = mean(rt_attachment)) # does make sense to take mean here?

p <- ggplot(summary_rts, aes(x = factor(rating_plausibility), y = mean_rt,items = items,fill=factor(rating_plausibility))) +
  geom_boxplot() +
  facet_wrap(~attachment) +
  ggtitle("RT distribution over items per plausibility bin")
p <- ggplotly(p,tooltip = c("items","mean_rt"))
p